The purpose of the notebook is to apply PCHA to the single cell cell line data.
Reading in preprocessed (MAGIC imputed) data from from 2-Human_cell_lines_scPCHA.ipynb
# file = "../../out/cell-lines/X_magic_for_R.h5ad"
# Convert(file, dest = "h5seurat", overwrite = TRUE)
# data <- LoadH5Seurat("../../out/cell-lines/X_magic_for_R.h5seurat")
# Idents(data) = data@meta.data$cline
ElbowPlot(data) #this plot indicates the top 8 are important
DimPlot(data, reduction = "pca", label = TRUE)
pct <- data[["pca"]]@stdev / sum(data[["pca"]]@stdev) * 100
# Calculate cumulative percents for each PC
cumu <- cumsum(pct)
# Determine which PC exhibits cumulative percent greater than 90% and % variation associated with the PC is less than 5
co1 <- which(cumu > 90 & pct < 5)[1]
print(co1) #38
[1] 38
# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which((pct[1:length(pct) - 1] - pct[2:length(pct)]) > 0.1), decreasing = T)[1] + 1
# last point where change of % of variation is more than 0.1%.
print(co2) #11
[1] 11
data <- FindNeighbors(data, dims = 1:10)
Computing nearest neighbor graph
Computing SNN
data <- FindClusters(data, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 16108
Number of edges: 432634
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9446
Number of communities: 21
Elapsed time: 1 seconds
data <- RunUMAP(data, dims = 1:10)
10:49:36 UMAP embedding parameters a = 0.9922 b = 1.112
10:49:36 Read 16108 rows and found 10 numeric columns
10:49:36 Using Annoy for neighbor search, n_neighbors = 30
10:49:36 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
10:49:38 Writing NN index file to temp file /var/folders/vh/xk34gq593k53lzm7wlyg64xw0000gn/T//RtmpIr3N9b/file125ad65ea1dbd
10:49:38 Searching Annoy index using 1 thread, search_k = 3000
10:49:41 Annoy recall = 100%
10:49:42 Commencing smooth kNN distance calibration using 1 thread
10:49:43 Initializing from normalized Laplacian + noise
10:49:46 Commencing optimization for 200 epochs, with 571528 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
10:49:54 Optimization finished
DimPlot(data, reduction = "umap")
DimPlot(data, label = TRUE)
VlnPlot(data, features = c("ASCL1", "YAP1", "POU2F3", 'CALCA','NEUROD2', 'MYC'), combine = FALSE)
[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
library(dplyr)
data.markers <- FindAllMarkers(data, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
data.markers.top <- data.markers %>% group_by(cluster) %>% top_n(n = 30, wt = avg_log2FC)
# install.packages('plotly')
# Load plot_ly
library(plotly)
# Extract tSNE information from Seurat Object
pca_1 <- data[["pca"]]@cell.embeddings[,1]
pca_2 <- data[["pca"]]@cell.embeddings[,2]
pca_3 <- data[["pca"]]@cell.embeddings[,3]
# Visualize what headings are called so that you can extract them to form a dataframe
Embeddings(object = data, reduction = "pca")
PC_1 PC_2 PC_3 PC_4 PC_5 PC_6
2637-SM-1-GCCAAT_S1:AAAACCTCCCACTCCTCx-DMS454 -5.330006e-01 15.7831573486 -4.99912739 4.431528568 12.344715118 -9.942733e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCACGAAACGx-DMS454 -1.523991e+00 13.8032426834 -5.34413099 5.951599121 12.662260056 1.738698e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATAGTCGCAx-DMS454 -9.072709e-01 16.6352920532 -5.61871767 4.974217415 13.788206100 -1.106682e+00
2637-SM-1-GCCAAT_S1:AAAACCTCCTTTACCCTx-DMS454 -5.730304e-01 15.6429271698 -5.67932987 5.476871490 13.618507385 -9.325639e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACACAAGGCx-DMS454 -1.492131e-01 14.8050909042 -4.79194593 4.477123737 12.224570274 -2.831221e-01
2637-SM-1-GCCAAT_S1:AAAACTCGACCCTAACCx-DMS454 -3.660548e-01 15.3729009628 -6.26665592 6.448869705 13.469744682 -4.325381e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCCCGCAACTx-DMS454 1.194306e-01 16.8530864716 -6.62010336 7.441605568 14.661459923 -5.750430e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCACCACGCTx-DMS454 -2.065984e+00 15.2632265091 -3.70344591 3.916329861 12.638164520 4.882870e-02
2637-SM-1-GCCAAT_S1:AAAACTCGAATACTCTTx-DMS454 -1.548584e+00 15.0833835602 -5.00290728 3.365054607 12.168499947 -1.200142e+00
2637-SM-1-GCCAAT_S1:AAAAGTCGGAATTCCCAx-DMS454 -1.805957e+00 14.7095775604 -3.76334620 3.833495140 12.354162216 1.960566e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACATCGCAGx-DMS454 -4.979624e-01 15.6097192764 -6.18945026 6.585252285 13.696672440 -3.300405e-01
2637-SM-1-GCCAAT_S1:AAAACTCGATTGTTTACx-DMS454 -2.043035e+00 10.9974489212 -2.54944658 5.212202549 8.051369667 -7.598011e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTAGCAAAGCCx-DMS454 -8.356372e-01 15.5325307846 -5.96117592 6.481206894 13.347260475 -2.213525e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATAAATAGGx-DMS454 -9.033749e-02 17.6406078339 -7.28156424 8.401016235 15.122898102 -6.395189e-01
2637-SM-1-GCCAAT_S1:AAAACTCGACTACGAGCx-DMS454 -2.914330e+00 -1.6414843798 -3.57089758 -6.497498512 3.247905493 -4.004478e+00
2637-SM-1-GCCAAT_S1:AAAACCTCCAGCAGAACx-DMS454 -7.034260e-01 15.8505172729 -5.98290396 6.183456898 13.805769920 -6.636370e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTAATGGATTAx-DMS454 -5.494278e-01 16.9386711121 -6.75094748 7.617156029 14.676543236 -4.268895e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACGTATTTCx-DMS454 2.638007e+00 13.2642173767 -3.52007580 3.297828913 11.559779167 -9.327608e-01
2637-SM-1-GCCAAT_S1:AAAACTCGAAAACAGGGx-DMS454 -1.063793e+00 14.9242191315 -5.85806179 6.549662113 13.248745918 -7.137018e-02
2637-SM-1-GCCAAT_S1:AAAAGCCTATCCAGTCCx-DMS454 -9.941164e-01 10.5937366486 -5.71169567 3.632258177 10.855877876 -1.727342e+00
PC_7 PC_8 PC_9 PC_10 PC_11 PC_12
2637-SM-1-GCCAAT_S1:AAAACCTCCCACTCCTCx-DMS454 -1.5823048353 -0.1460189521 -2.3531355858 -0.4034147859 1.010060e+00 -1.242242e+00
2637-SM-1-GCCAAT_S1:AAAACCTCCACGAAACGx-DMS454 0.8093938828 4.0745525360 2.8720707893 -2.4640698433 -5.469205e-01 5.758511e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATAGTCGCAx-DMS454 -1.5789358616 0.0212975442 -1.3608397245 0.3628246188 1.411706e+00 -1.599171e+00
2637-SM-1-GCCAAT_S1:AAAACCTCCTTTACCCTx-DMS454 -1.9826710224 -2.1473784447 0.0107318116 -1.0531636477 -7.399411e-01 8.366549e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACACAAGGCx-DMS454 -0.2154319286 -1.1156141758 3.6134490967 3.6556010246 -5.766839e-01 7.318997e-01
2637-SM-1-GCCAAT_S1:AAAACTCGACCCTAACCx-DMS454 -1.1579729319 -0.7756161690 2.7647163868 -1.7436710596 -8.506994e-01 8.972740e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCCCGCAACTx-DMS454 -1.6920911074 -0.9535923004 3.2410397530 -3.0031275749 -5.564780e-01 4.523649e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCACCACGCTx-DMS454 1.8616019487 6.5816016197 -1.5109727383 -0.0958601087 8.991758e-01 -5.878053e-01
2637-SM-1-GCCAAT_S1:AAAACTCGAATACTCTTx-DMS454 -1.2537747622 -0.6844640374 -2.8660185337 2.1399700642 1.192562e+00 -1.383020e+00
2637-SM-1-GCCAAT_S1:AAAAGTCGGAATTCCCAx-DMS454 1.7403351068 5.2084922791 -0.3655781746 1.2552425861 5.188878e-01 -2.529318e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACATCGCAGx-DMS454 -1.0582358837 -0.7722041011 3.0721859932 -1.5139043331 -7.109895e-01 6.874685e-01
2637-SM-1-GCCAAT_S1:AAAACTCGATTGTTTACx-DMS454 -0.5476834178 0.2590969801 -0.8020487428 0.7853537798 2.881605e-01 7.694458e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTAGCAAAGCCx-DMS454 -0.4385137856 2.2961506844 2.3085794449 -2.7894232273 7.504693e-01 -1.101765e+00
2637-SM-1-GCCAAT_S1:AAAAGCCTATAAATAGGx-DMS454 -1.4882925749 1.3155431747 3.9289636612 -4.6403293610 4.805585e-01 -8.113170e-01
2637-SM-1-GCCAAT_S1:AAAACTCGACTACGAGCx-DMS454 -0.0806917399 0.2646243274 1.1100229025 1.5266330242 -8.217089e-01 1.441019e+00
2637-SM-1-GCCAAT_S1:AAAACCTCCAGCAGAACx-DMS454 -0.8599081635 1.2731432915 1.7189826965 -1.8128361702 7.161572e-01 -9.820016e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTAATGGATTAx-DMS454 -1.0586160421 1.9431242943 2.9348170757 -3.9571712017 4.592456e-01 -8.290518e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACGTATTTCx-DMS454 -1.3473163843 -1.0577111244 -2.0048480034 1.2461444139 -8.738574e-01 1.420979e-02
2637-SM-1-GCCAAT_S1:AAAACTCGAAAACAGGGx-DMS454 -0.0843934864 2.8753423691 2.6546254158 -3.1262457371 1.063335e-01 -3.022336e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATCCAGTCCx-DMS454 -0.3586787581 1.4112807512 4.0291538239 -2.4531519413 -8.304539e-01 4.661430e-01
PC_13 PC_14 PC_15 PC_16 PC_17 PC_18
2637-SM-1-GCCAAT_S1:AAAACCTCCCACTCCTCx-DMS454 7.294953e-01 2.473359e-01 -7.270732e-02 -2.521090e-01 -4.654368e-01 -2.780792e-02
2637-SM-1-GCCAAT_S1:AAAACCTCCACGAAACGx-DMS454 -6.563130e-01 -6.097929e-01 1.682930e-01 -2.196707e-01 3.552442e-01 2.475258e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATAGTCGCAx-DMS454 3.170278e-01 6.124064e-01 -3.278669e-01 -3.733237e-01 6.355231e-02 9.698861e-02
2637-SM-1-GCCAAT_S1:AAAACCTCCTTTACCCTx-DMS454 -3.379585e-01 -3.340137e-01 1.280244e-01 2.564772e-01 6.875494e-02 -1.878919e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACACAAGGCx-DMS454 9.090763e-01 -5.115703e-02 2.939613e-01 6.625721e-01 -4.279705e-01 -1.755224e-01
2637-SM-1-GCCAAT_S1:AAAACTCGACCCTAACCx-DMS454 -7.160092e-01 -7.426192e-01 2.459083e-01 -1.848288e-01 3.796791e-01 5.888232e-02
2637-SM-1-GCCAAT_S1:AAAACCTCCCCGCAACTx-DMS454 -6.608389e-01 -5.686569e-01 1.608559e-01 -3.444324e-01 3.996090e-01 5.915011e-02
2637-SM-1-GCCAAT_S1:AAAACCTCCACCACGCTx-DMS454 7.845730e-01 8.550652e-01 -2.445948e-01 5.784501e-01 -6.660061e-01 -1.514170e-01
2637-SM-1-GCCAAT_S1:AAAACTCGAATACTCTTx-DMS454 3.351901e-01 4.786427e-01 -2.260917e-01 -2.824205e-01 6.519974e-02 6.015361e-02
2637-SM-1-GCCAAT_S1:AAAAGTCGGAATTCCCAx-DMS454 8.090588e-01 6.481906e-01 -5.978066e-02 6.044359e-01 -6.449153e-01 -1.525724e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACATCGCAGx-DMS454 -6.048650e-01 -6.486464e-01 2.501285e-01 -2.381237e-01 3.467272e-01 8.273399e-02
2637-SM-1-GCCAAT_S1:AAAACTCGATTGTTTACx-DMS454 -4.757321e-01 3.721789e-01 -2.520603e-01 -1.379211e+00 -7.259366e-01 1.447789e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTAGCAAAGCCx-DMS454 -3.044279e-01 7.736880e-02 -2.484042e-01 -4.632261e-01 4.170283e-01 3.402945e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATAAATAGGx-DMS454 -6.636350e-01 -2.856323e-01 -9.093310e-02 -8.761009e-01 7.093430e-01 2.251630e-01
2637-SM-1-GCCAAT_S1:AAAACTCGACTACGAGCx-DMS454 -2.682567e+00 4.656917e+00 -2.625844e+00 3.152568e+00 2.479281e+00 -3.161562e+00
2637-SM-1-GCCAAT_S1:AAAACCTCCAGCAGAACx-DMS454 -1.485458e-01 9.704217e-02 -1.959073e-01 -4.035964e-01 2.870651e-01 2.240526e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTAATGGATTAx-DMS454 -5.323181e-01 -1.454448e-01 -1.211627e-01 -6.329373e-01 5.164869e-01 2.341870e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACGTATTTCx-DMS454 -7.442645e-01 1.214221e+00 -1.654689e+00 -2.010369e+00 -5.546962e-01 1.628046e+00
2637-SM-1-GCCAAT_S1:AAAACTCGAAAACAGGGx-DMS454 -5.057213e-01 -3.069415e-01 -4.943693e-02 -4.428465e-01 4.655954e-01 3.297435e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATCCAGTCCx-DMS454 -1.474357e+00 8.051838e-01 -6.313702e-01 6.749286e-01 1.101827e+00 -8.776268e-01
PC_19 PC_20 PC_21 PC_22 PC_23 PC_24
2637-SM-1-GCCAAT_S1:AAAACCTCCCACTCCTCx-DMS454 -3.361792e-02 1.447751e-01 1.208883e-01 -2.611945e-01 -7.184841e-02 7.438824e-02
2637-SM-1-GCCAAT_S1:AAAACCTCCACGAAACGx-DMS454 1.709947e-01 1.347327e-01 4.618974e-02 -1.756053e-01 -1.926692e-01 3.196495e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATAGTCGCAx-DMS454 9.360180e-02 -6.518707e-02 -2.673428e-01 -3.187393e-01 9.336621e-02 1.378541e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCTTTACCCTx-DMS454 -1.850931e-01 -5.897813e-02 9.548500e-02 1.927239e-01 -6.593563e-02 -1.193439e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACACAAGGCx-DMS454 -2.875430e-02 -4.964193e-02 1.782237e-02 1.758711e-01 1.329684e-01 -1.548852e-01
2637-SM-1-GCCAAT_S1:AAAACTCGACCCTAACCx-DMS454 -3.683019e-02 -1.236001e-02 9.205112e-02 -4.512759e-02 -1.551574e-01 1.775038e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCCCGCAACTx-DMS454 5.506851e-03 4.424182e-02 -5.439821e-04 -1.358151e-01 -1.185811e-01 3.017917e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCACCACGCTx-DMS454 -1.949050e-01 -1.975647e-02 5.480625e-02 3.971961e-01 3.422881e-01 -5.079930e-01
2637-SM-1-GCCAAT_S1:AAAACTCGAATACTCTTx-DMS454 9.768248e-02 -7.000690e-03 -2.165902e-01 -2.868348e-01 5.089015e-02 9.245434e-02
2637-SM-1-GCCAAT_S1:AAAAGTCGGAATTCCCAx-DMS454 -5.477002e-02 -5.246362e-02 -6.877988e-04 2.764813e-01 2.532178e-01 -4.490738e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACATCGCAGx-DMS454 4.132291e-02 3.455275e-02 3.084295e-02 -1.738249e-01 -5.805039e-02 3.410251e-01
2637-SM-1-GCCAAT_S1:AAAACTCGATTGTTTACx-DMS454 -1.427839e+00 -1.418182e+00 8.011951e-02 -5.454413e-01 6.865828e-01 4.194458e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTAGCAAAGCCx-DMS454 2.898117e-01 2.011478e-01 1.234212e-03 -1.679437e-01 -4.029469e-01 7.488415e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATAAATAGGx-DMS454 2.666845e-01 7.388177e-02 -1.422659e-01 -6.168332e-01 -1.795759e-01 6.165286e-01
2637-SM-1-GCCAAT_S1:AAAACTCGACTACGAGCx-DMS454 2.402028e+00 5.101840e-01 -5.245062e+00 -4.026399e+00 -4.074595e-01 3.473213e+00
2637-SM-1-GCCAAT_S1:AAAACCTCCAGCAGAACx-DMS454 2.337724e-01 1.168464e-01 -6.824884e-02 -1.661357e-01 -2.706376e-01 5.776265e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTAATGGATTAx-DMS454 2.044680e-01 2.041140e-02 -1.424628e-01 -4.758938e-01 -2.169030e-01 4.994684e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACGTATTTCx-DMS454 -2.269364e+00 -2.504901e+00 4.193557e+00 -1.198555e+00 -7.958659e-01 -9.608542e-01
2637-SM-1-GCCAAT_S1:AAAACTCGAAAACAGGGx-DMS454 2.418210e-01 1.693951e-01 6.386673e-02 -2.473812e-01 -3.810302e-01 5.966483e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATCCAGTCCx-DMS454 5.792484e-01 8.776578e-01 -5.925104e-01 -5.504613e-01 -1.880634e-01 1.186856e+00
PC_25 PC_26 PC_27 PC_28 PC_29 PC_30
2637-SM-1-GCCAAT_S1:AAAACCTCCCACTCCTCx-DMS454 -3.977298e-01 3.918801e-01 -1.564477e-01 -1.307487e-01 -5.061774e-03 -1.912944e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCACGAAACGx-DMS454 8.510780e-02 -5.265933e-03 -3.134925e-04 1.580443e-01 -4.489630e-02 6.675449e-02
2637-SM-1-GCCAAT_S1:AAAAGCCTATAGTCGCAx-DMS454 1.192606e-01 5.232215e-02 4.326988e-02 -5.141614e-03 1.449553e-02 -6.223439e-02
2637-SM-1-GCCAAT_S1:AAAACCTCCTTTACCCTx-DMS454 -9.078123e-02 1.396326e-01 3.425571e-02 9.323268e-03 -8.659253e-02 -1.982041e-02
2637-SM-1-GCCAAT_S1:AAAAGCCTACACAAGGCx-DMS454 -3.929860e-02 -2.008294e-01 -2.273200e-01 -2.275601e-01 2.155262e-01 4.303970e-03
2637-SM-1-GCCAAT_S1:AAAACTCGACCCTAACCx-DMS454 5.949186e-02 -2.185955e-02 8.798843e-02 1.338937e-01 -1.398457e-01 1.637987e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCCCGCAACTx-DMS454 7.151665e-03 -2.824771e-03 2.423345e-03 1.413002e-01 -1.733714e-01 1.577226e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCACCACGCTx-DMS454 -8.129880e-02 3.419898e-01 -1.093046e-01 -5.941041e-01 3.041223e-01 -1.312581e-01
2637-SM-1-GCCAAT_S1:AAAACTCGAATACTCTTx-DMS454 1.768360e-01 5.848901e-03 -7.766823e-02 1.253037e-04 -1.404598e-01 -6.942572e-02
2637-SM-1-GCCAAT_S1:AAAAGTCGGAATTCCCAx-DMS454 -6.737050e-02 1.229401e-01 -1.349613e-01 -4.096593e-01 2.963899e-01 -1.458177e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACATCGCAGx-DMS454 8.214568e-02 -3.866295e-02 -1.231640e-02 2.106691e-01 -1.340678e-01 1.600724e-01
2637-SM-1-GCCAAT_S1:AAAACTCGATTGTTTACx-DMS454 4.413525e-01 -2.363106e+00 1.869014e+00 2.721277e+00 4.044595e-01 -3.122877e+00
2637-SM-1-GCCAAT_S1:AAAAGCCTAGCAAAGCCx-DMS454 1.774153e-01 -2.495440e-01 7.436457e-02 3.042828e-01 -3.529809e-01 1.352746e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATAAATAGGx-DMS454 2.887139e-01 -9.830377e-02 1.876618e-02 2.383744e-01 -2.483719e-01 1.343798e-01
2637-SM-1-GCCAAT_S1:AAAACTCGACTACGAGCx-DMS454 2.910516e+00 1.102564e+00 -1.725626e+00 1.483574e+00 -2.135145e+00 -3.559217e+00
2637-SM-1-GCCAAT_S1:AAAACCTCCAGCAGAACx-DMS454 1.618010e-01 -2.171500e-01 2.331492e-02 2.023569e-01 -2.544110e-01 1.096027e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTAATGGATTAx-DMS454 1.489263e-01 -8.740588e-02 -1.153311e-02 1.832761e-01 -2.104063e-01 7.532500e-02
2637-SM-1-GCCAAT_S1:AAAAGCCTACGTATTTCx-DMS454 2.721646e+00 2.827084e+00 8.362729e-01 -8.361251e-02 -5.288748e-01 -9.289427e-02
2637-SM-1-GCCAAT_S1:AAAACTCGAAAACAGGGx-DMS454 8.702165e-02 -1.425820e-01 5.902813e-02 2.709337e-01 -2.524801e-01 1.303372e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATCCAGTCCx-DMS454 8.782906e-01 -4.602059e-02 -6.118370e-01 4.619919e-01 -4.129651e-01 -8.820707e-01
PC_31 PC_32 PC_33 PC_34 PC_35 PC_36
2637-SM-1-GCCAAT_S1:AAAACCTCCCACTCCTCx-DMS454 7.178337e-02 3.468694e-01 -7.633416e-03 2.547109e-01 -1.607669e-01 -7.588624e-02
2637-SM-1-GCCAAT_S1:AAAACCTCCACGAAACGx-DMS454 1.078478e-01 6.567536e-02 -7.564043e-02 -2.536303e-03 -5.242444e-02 5.642208e-02
2637-SM-1-GCCAAT_S1:AAAAGCCTATAGTCGCAx-DMS454 -4.433376e-02 -1.821055e-02 -1.274771e-02 -2.635548e-02 1.740603e-02 -7.014634e-02
2637-SM-1-GCCAAT_S1:AAAACCTCCTTTACCCTx-DMS454 4.249171e-03 -8.908787e-02 -4.482022e-02 1.245225e-01 -5.678309e-02 1.047219e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACACAAGGCx-DMS454 8.471525e-02 1.038909e-01 2.913630e-01 -1.514539e-02 1.846113e-02 2.000261e-01
2637-SM-1-GCCAAT_S1:AAAACTCGACCCTAACCx-DMS454 -4.943119e-02 1.029804e-01 4.040855e-02 -5.410886e-02 -1.500213e-02 3.431663e-02
2637-SM-1-GCCAAT_S1:AAAACCTCCCCGCAACTx-DMS454 -8.906568e-02 4.923093e-02 -8.093462e-02 -1.622516e-01 -8.921239e-02 3.743656e-03
2637-SM-1-GCCAAT_S1:AAAACCTCCACCACGCTx-DMS454 3.415898e-01 -6.192387e-02 2.863931e-01 4.880216e-01 -1.245923e-01 6.956635e-02
2637-SM-1-GCCAAT_S1:AAAACTCGAATACTCTTx-DMS454 -1.635870e-01 -3.102930e-02 -1.014364e-02 -1.447027e-01 4.717002e-02 -4.857706e-02
2637-SM-1-GCCAAT_S1:AAAAGTCGGAATTCCCAx-DMS454 3.359815e-01 1.729917e-03 2.394908e-01 2.381256e-01 -3.874166e-02 1.233448e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACATCGCAGx-DMS454 -6.318924e-02 3.269618e-02 1.766223e-03 -1.607636e-01 -2.637605e-03 -2.969957e-02
2637-SM-1-GCCAAT_S1:AAAACTCGATTGTTTACx-DMS454 -3.020128e+00 -1.314013e+00 -6.959263e-02 4.193771e-01 3.568034e+00 -6.496847e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTAGCAAAGCCx-DMS454 -1.641549e-01 1.447559e-01 -9.986452e-02 -1.626322e-01 3.822305e-02 -6.053107e-02
2637-SM-1-GCCAAT_S1:AAAAGCCTATAAATAGGx-DMS454 -2.412210e-01 9.583429e-02 -1.934880e-01 -3.834872e-01 -4.252978e-02 -9.706888e-02
2637-SM-1-GCCAAT_S1:AAAACTCGACTACGAGCx-DMS454 1.421064e+00 -2.868943e+00 -4.081529e+00 8.878630e+00 -1.958373e+00 -3.553493e+00
2637-SM-1-GCCAAT_S1:AAAACCTCCAGCAGAACx-DMS454 -1.385888e-01 8.313509e-02 -7.316940e-02 -1.311843e-01 -7.035925e-03 -7.067559e-02
2637-SM-1-GCCAAT_S1:AAAAGCCTAATGGATTAx-DMS454 -2.068422e-01 6.200801e-02 -3.333154e-02 -2.097748e-01 -7.080219e-03 -1.563248e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACGTATTTCx-DMS454 -4.385982e-01 3.242601e-01 1.186218e+00 1.276476e+00 1.347943e+00 6.258379e-02
2637-SM-1-GCCAAT_S1:AAAACTCGAAAACAGGGx-DMS454 -1.014097e-01 1.664639e-01 -1.123762e-01 -1.541795e-01 4.660483e-02 -6.295755e-02
2637-SM-1-GCCAAT_S1:AAAAGCCTATCCAGTCCx-DMS454 6.077069e-01 -8.327677e-01 -1.265472e+00 2.488956e+00 -5.756503e-01 -7.218584e-01
PC_37 PC_38 PC_39 PC_40 PC_41 PC_42
2637-SM-1-GCCAAT_S1:AAAACCTCCCACTCCTCx-DMS454 -1.689664e-02 -1.972325e-01 -1.062634e-01 -2.176611e-01 9.353866e-02 -4.498015e-02
2637-SM-1-GCCAAT_S1:AAAACCTCCACGAAACGx-DMS454 -1.305201e-02 1.118238e-01 6.956173e-02 7.286144e-02 -4.204969e-02 -1.045563e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATAGTCGCAx-DMS454 7.166870e-02 -1.011380e-02 1.810843e-02 -2.243755e-01 4.089223e-02 -9.096775e-02
2637-SM-1-GCCAAT_S1:AAAACCTCCTTTACCCTx-DMS454 -7.499694e-02 -5.142114e-02 3.235722e-02 2.580668e-02 1.054653e-02 -1.762611e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACACAAGGCx-DMS454 -2.205022e-01 -2.029201e-01 -2.154740e-02 -9.118601e-02 1.999458e-01 -2.467799e-01
2637-SM-1-GCCAAT_S1:AAAACTCGACCCTAACCx-DMS454 1.521888e-02 5.857362e-02 1.163281e-01 5.914750e-02 -9.708164e-03 -1.818074e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCCCGCAACTx-DMS454 7.991549e-02 6.939331e-02 4.535251e-02 6.201423e-02 -7.918986e-02 -2.174866e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCACCACGCTx-DMS454 -1.575505e-01 -1.614078e-01 -1.657474e-01 1.251460e-01 1.143411e-01 2.847401e-01
2637-SM-1-GCCAAT_S1:AAAACTCGAATACTCTTx-DMS454 3.849934e-02 -6.665115e-02 2.792093e-02 -4.471807e-02 -8.694945e-02 -3.725541e-02
2637-SM-1-GCCAAT_S1:AAAAGTCGGAATTCCCAx-DMS454 -1.841896e-01 -1.848598e-01 -8.723698e-02 1.321984e-01 6.782246e-02 2.036870e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACATCGCAGx-DMS454 5.889064e-02 1.073408e-01 8.057758e-02 2.929789e-03 -1.008963e-01 -2.270185e-01
2637-SM-1-GCCAAT_S1:AAAACTCGATTGTTTACx-DMS454 1.613025e+00 7.014663e+00 -9.202596e+00 2.949777e+00 -9.664982e+00 9.640714e+00
2637-SM-1-GCCAAT_S1:AAAAGCCTAGCAAAGCCx-DMS454 8.275391e-02 1.372719e-01 4.264903e-02 -9.388711e-02 -2.176736e-02 -2.024437e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATAAATAGGx-DMS454 1.958274e-01 2.096978e-01 -1.173926e-01 3.414507e-02 -8.306266e-02 -2.904435e-01
2637-SM-1-GCCAAT_S1:AAAACTCGACTACGAGCx-DMS454 2.831384e+00 2.893290e+00 -4.650798e-01 -2.104443e+00 1.311148e+00 -3.450706e+00
2637-SM-1-GCCAAT_S1:AAAACCTCCAGCAGAACx-DMS454 9.760256e-02 1.212680e-01 4.582188e-02 -6.033961e-02 -6.719584e-02 -1.621660e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTAATGGATTAx-DMS454 1.486137e-01 1.128113e-01 4.917265e-02 8.891997e-03 -4.511118e-02 -2.889845e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACGTATTTCx-DMS454 -1.765312e-01 7.473551e-01 8.582608e-01 -9.052635e-01 -1.210818e-02 -1.176940e-01
2637-SM-1-GCCAAT_S1:AAAACTCGAAAACAGGGx-DMS454 7.303432e-02 1.488307e-01 5.052372e-02 -6.745781e-03 -7.876925e-02 -1.919898e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATCCAGTCCx-DMS454 5.032282e-01 9.111993e-01 -4.038033e-01 -7.601706e-01 7.583009e-02 -9.282212e-01
PC_43 PC_44 PC_45 PC_46 PC_47 PC_48
2637-SM-1-GCCAAT_S1:AAAACCTCCCACTCCTCx-DMS454 -1.846406e-01 1.547253e-01 2.297880e-01 -1.943068e-02 -2.792307e-01 -5.937706e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCACGAAACGx-DMS454 -5.772941e-02 -4.995238e-02 2.229435e-01 -1.269658e-02 -1.112332e-01 -6.078280e-02
2637-SM-1-GCCAAT_S1:AAAAGCCTATAGTCGCAx-DMS454 5.303998e-02 1.857944e-02 1.150127e-01 -2.350276e-02 -9.493674e-02 -1.608055e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCTTTACCCTx-DMS454 -1.631701e-01 3.260004e-02 6.515490e-02 -1.316489e-03 -8.854987e-02 -9.888419e-02
2637-SM-1-GCCAAT_S1:AAAAGCCTACACAAGGCx-DMS454 -1.484205e-01 -1.083003e-01 -3.071407e-01 1.440026e-01 -7.384166e-02 2.190104e-01
2637-SM-1-GCCAAT_S1:AAAACTCGACCCTAACCx-DMS454 -7.780109e-02 -4.700582e-02 3.027983e-01 2.758679e-02 -4.392372e-02 1.004395e-02
2637-SM-1-GCCAAT_S1:AAAACCTCCCCGCAACTx-DMS454 1.618318e-01 -1.340839e-01 3.122295e-01 1.081941e-03 -5.903462e-02 -6.968346e-02
2637-SM-1-GCCAAT_S1:AAAACCTCCACCACGCTx-DMS454 -5.004801e-01 9.769507e-02 -8.189958e-01 -1.185895e-01 -1.391004e-02 1.722746e-01
2637-SM-1-GCCAAT_S1:AAAACTCGAATACTCTTx-DMS454 4.028716e-02 -5.255036e-02 6.954242e-02 5.470438e-02 3.286935e-02 -1.326982e-01
2637-SM-1-GCCAAT_S1:AAAAGTCGGAATTCCCAx-DMS454 -2.477535e-01 6.182216e-02 -4.947552e-01 -6.486649e-02 -1.633124e-01 4.576390e-02
2637-SM-1-GCCAAT_S1:AAAAGCCTACATCGCAGx-DMS454 -1.846112e-02 -5.164485e-02 2.833284e-01 5.407869e-02 -8.365037e-02 -9.971757e-02
2637-SM-1-GCCAAT_S1:AAAACTCGATTGTTTACx-DMS454 -2.116442e+00 -3.688505e+00 -3.158458e+00 1.017042e+00 -3.195291e-01 -1.171085e+00
2637-SM-1-GCCAAT_S1:AAAAGCCTAGCAAAGCCx-DMS454 6.187797e-02 -1.471764e-01 3.143212e-01 7.808868e-02 -9.036778e-02 -1.025820e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATAAATAGGx-DMS454 1.241250e-01 -1.913793e-01 6.055672e-01 1.857070e-01 1.007440e-01 -1.963456e-01
2637-SM-1-GCCAAT_S1:AAAACTCGACTACGAGCx-DMS454 -3.849472e+00 -8.260834e+00 8.411742e+00 -5.707857e+00 1.167274e+01 7.241825e+00
2637-SM-1-GCCAAT_S1:AAAACCTCCAGCAGAACx-DMS454 -3.108031e-02 -1.104532e-01 2.975808e-01 9.492937e-02 -3.928870e-02 -1.307017e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTAATGGATTAx-DMS454 2.075522e-02 -3.319533e-02 4.071069e-01 1.319857e-01 8.243781e-02 -1.094437e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACGTATTTCx-DMS454 -7.953045e-01 1.077939e+00 -5.254813e-01 -9.795715e-01 -7.058629e-01 7.108837e-01
2637-SM-1-GCCAAT_S1:AAAACTCGAAAACAGGGx-DMS454 5.553305e-02 -7.521629e-02 3.535968e-01 8.646125e-02 -1.496579e-01 -8.141145e-02
2637-SM-1-GCCAAT_S1:AAAAGCCTATCCAGTCCx-DMS454 -1.188960e+00 -2.053133e+00 2.597486e+00 -8.228294e-01 3.381056e+00 2.191938e+00
PC_49 PC_50
2637-SM-1-GCCAAT_S1:AAAACCTCCCACTCCTCx-DMS454 -7.585093e-02 -8.387165e-02
2637-SM-1-GCCAAT_S1:AAAACCTCCACGAAACGx-DMS454 6.375924e-02 -8.397289e-02
2637-SM-1-GCCAAT_S1:AAAAGCCTATAGTCGCAx-DMS454 6.255343e-02 -1.203182e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCTTTACCCTx-DMS454 2.751223e-02 9.196898e-02
2637-SM-1-GCCAAT_S1:AAAAGCCTACACAAGGCx-DMS454 -2.439860e-01 -2.369405e-01
2637-SM-1-GCCAAT_S1:AAAACTCGACCCTAACCx-DMS454 7.985099e-02 -1.775235e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCCCGCAACTx-DMS454 9.817056e-02 -1.786353e-01
2637-SM-1-GCCAAT_S1:AAAACCTCCACCACGCTx-DMS454 -4.951902e-01 3.509490e-02
2637-SM-1-GCCAAT_S1:AAAACTCGAATACTCTTx-DMS454 -4.071366e-02 -1.129273e-02
2637-SM-1-GCCAAT_S1:AAAAGTCGGAATTCCCAx-DMS454 -3.041050e-01 5.165340e-02
2637-SM-1-GCCAAT_S1:AAAAGCCTACATCGCAGx-DMS454 9.131834e-02 -1.183159e-01
2637-SM-1-GCCAAT_S1:AAAACTCGATTGTTTACx-DMS454 3.167262e-01 -4.674257e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTAGCAAAGCCx-DMS454 1.243121e-01 -1.805104e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATAAATAGGx-DMS454 1.355283e-01 -4.413607e-01
2637-SM-1-GCCAAT_S1:AAAACTCGACTACGAGCx-DMS454 -2.490258e+00 2.252123e+00
2637-SM-1-GCCAAT_S1:AAAACCTCCAGCAGAACx-DMS454 1.126873e-01 -1.382249e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTAATGGATTAx-DMS454 6.485464e-02 -3.004072e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTACGTATTTCx-DMS454 -4.851449e-01 4.000901e-02
2637-SM-1-GCCAAT_S1:AAAACTCGAAAACAGGGx-DMS454 1.172132e-01 -1.553439e-01
2637-SM-1-GCCAAT_S1:AAAAGCCTATCCAGTCCx-DMS454 -7.841925e-01 4.542918e-01
[ reached getOption("max.print") -- omitted 16088 rows ]
plot.data <- FetchData(object = data, vars = c("PC_1", "PC_2", "PC_3", "cline"))
plot.data$label <- paste(rownames(plot.data))
plot_ly(data = plot.data,
x = ~PC_1, y = ~PC_2, z = ~PC_3,
color = ~cline,
colors = c("lightseagreen",
"green",
"red",
"orange1",
"royalblue1",
"lightcyan3",
"peachpuff3",
"darkorchid1",
"turquoise",
"darkmagenta"),
type = "scatter3d",
mode = "markers",
marker = list(size = 5, width=2), # controls size of points
text=~label, #This is that extra column we made earlier for which we will use for cell ID
hoverinfo="text") #When you visualize your plotly object, hovering your mouse pointer over a point shows cell names
VizDimLoadings(data, dims = 1:3, reduction = "pca")
data <- ProjectDim(data, reduction = 'pca')
PC_ 1
Positive: NR2F1, COL2A1, ZIC2, NELL1, FABP7, DLX6-AS1, MAP1B, CALB1, RIPPLY3, CDKN1A
IGFBP5, RALYL, SPOCK1, MYCN, SMOC1, PCDH9, POSTN, S100A6, MS4A8, FAM60A
Negative: CAV1, RPS27A, SGK1, MT-CO2, CTGF, SERPINH1, LTBP4, LPHN2, MT-ND1, CLDN6
EIF5A, MT-ND4, MTATP6P1, MYC, HMGA1, CAV2, HMGA2, MT-CO1, MID1, RPL37A
PC_ 2
Positive: ASCL1, DSP, SCNN1A, CALCA, KRT8, UCP2, RAB3B, SEC11C, MPP1-1, HMGB3-1
TMEM176B, PCSK2, UGDH, SST, TFF3, AGT, SCN3A, OR51E2, ABHD2, BCAP31-1
Negative: TMSB4X, PTMS, TUBB, ENC1, CKB, FSCN1, MAP1B, TCF4, CSNK1E, MYC
TMSB4XP8, NUCKS1, ACTG1, DCX, EEF1A2, TUBA1A, RPL14, SGK1, RPS28, CAV1
PC_ 3
Positive: CAV1, SGK1, CTGF, SERPINH1, NR2F1, CLDN6, LPHN2, CAV2, MYL12B, CRIP2
PXDN, S100A11, MYL12A, PTRF, COL2A1, ANXA1, MMP10, RDH10, MID1, ALDH1A1
Negative: INSM1, BASP1, HEPACAM2, FAM91A1, MEST, CDKN2C, DLK1, MYC, PCSK2, NHLH2
RPL37, DPYSL2, COTL1, RAI14, NKX2-2, TSPAN13, SRD5A1, CDH12, SLFN11, ELN
PC_ 4
Positive: CAV1, SGK1, PIM1, ENC1, CTGF, PCSK2, MAP2, BASP1, KIF1A, HEPACAM2
LPHN2, CLDN6, SERPINH1, FAM91A1, VMP1, TENM3, ARID1A, FNIP2, EIF5A, MID1
Negative: SOX11, SNTB1, RPS28, MYC, RPL18, TPT1, NPW, RPL5, RPL21, ASS1
SUSD2, BEX1, RPL23, RTN1, RPS24, LDHB, RPL24, HMGB1, RPL19, RPLP1
PC_ 5
Positive: DSP, PCSK2, MPP1-1, TDRD1, TMEM176A, G6PD-1, AMBP, MIAT, POU4F2, CLDN18
GUCY2C, PLCG2, ONECUT2, MIB2, AKR1B10, OR51E2, MTSS1, CLU, UGDH, CMIP
Negative: PCSK1, SST, BCAT1, CCND2, FAM178B, PEG10, SPINK1, RAPGEF5, FAM91A1, MEST
CYP1A1, TAGLN2, UCP2, DLK1, EGFL7, EPCAM, KLK11, NHLH2, RAI14, GADD45A
We will now apply the archetype analysis to the single cell data. Since we’ve reduced the PCs to 10, we have gotten rid of a lot of the noise and still captured a large proportion of the variance.
Fit to k = 2 to 8 to find the best number of archetypes. We will look at the variance explained by each archetype as well as the t-ratios. To choose a final number to move forward with the analysis, we will run a randomization test to get a p-value for each number of archetypes (t-ratio test).
library(ParetoTI)
Loading required package: data.table
data.table 1.14.2 using 1 threads (see ?getDTthreads). Latest news: r-datatable.com
**********
This installation of data.table has not detected OpenMP support. It should still work but in single-threaded mode.
This is a Mac. Please read https://mac.r-project.org/openmp/. Please engage with Apple and ask them for support. Check r-datatable.com for updates, and our Mac instructions here: https://github.com/Rdatatable/data.table/wiki/Installation. After several years of many reports of installation problems on Mac, it's time to gingerly point out that there have been no similar problems on Windows or Linux.
**********
Attaching package: ‘data.table’
The following object is masked from ‘package:SummarizedExperiment’:
shift
The following object is masked from ‘package:GenomicRanges’:
shift
The following object is masked from ‘package:IRanges’:
shift
The following objects are masked from ‘package:S4Vectors’:
first, second
Loading required package: lpSolve
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
Warning: replacing previous import ‘ggplot2::last_plot’ by ‘plotly::last_plot’ when loading ‘ParetoTI’
library(cowplot)
library(ggplot2)
library(RColorBrewer)
library(reshape2)
Attaching package: ‘reshape2’
The following objects are masked from ‘package:data.table’:
dcast, melt
library(factoextra)
Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggfortify)
library(cluster)
# install.packages("data.table")
##################################
# load(file="../../out/cell-lines/ParetoTI/X_magic_for_ParetoTI.Robj")
x <- data[["RNA"]]@scale.data
x_pca <- read.csv('../../out/cell-lines/ParetoTI/pca_embedding.csv',header = TRUE,row.names = 1)
x_pca <- t(x_pca)
x_pca <- x_pca[1:11,] #keep only top 11 PCs
loadings<- read.csv('../../out/cell-lines/ParetoTI/pca_feature_loadings_projected.csv', header = TRUE, row.names = 1)
loadings <- as.matrix(loadings)
arc_ks_8 = k_fit_pch(x_pca[1:8,], ks = 2:8, check_installed = T,
bootstrap = T, bootstrap_N = 200, maxiter = 1000,
bootstrap_type = "m", seed = 2543,
volume_ratio = "t_ratio", # set to "none" if too slow
delta=0, conv_crit = 1e-04, order_type = "align",
sample_prop = 0.75)
# Show variance explained by a polytope with each k (cumulative)
plot_arc_var(arc_ks_8, type = "varexpl", point_size = 2, line_size = 1.5) + theme_bw()
plot_arc_var(arc_ks_8, type = "res_varexpl", point_size = 2, line_size = 1.5) + theme_bw()
plot_arc_var(arc_ks_8, type = "total_var", point_size = 2, line_size = 1.5) +
theme_bw() +
ylab("Mean variance in position of vertices")
plot_arc_var(arc_ks_8, type = "t_ratio", point_size = 2, line_size = 1.5) + theme_bw()
Warning: Removed 1 row(s) containing missing values (geom_path).
Warning: Removed 1 rows containing missing values (geom_point).
arc_ks = k_fit_pch(x_pca, ks = 2:8, check_installed = T,
bootstrap = T, bootstrap_N = 200, maxiter = 1000,
bootstrap_type = "m", seed = 2543,
volume_ratio = "t_ratio", # set to "none" if too slow
delta=0, conv_crit = 1e-04, order_type = "align",
sample_prop = 0.75)
Warning: could not honor request to load desired versions of Python; '/Users/smgroves/Documents/anaconda3/envs/reticulate_PCHA/bin/python3.7' was loaded instead (see reticulate::py_config() for more information)
# Show variance explained by a polytope with each k (cumulative)
plot_arc_var(arc_ks, type = "varexpl", point_size = 2, line_size = 1.5) + theme_bw()
plot_arc_var(arc_ks, type = "res_varexpl", point_size = 2, line_size = 1.5) + theme_bw()
plot_arc_var(arc_ks, type = "total_var", point_size = 2, line_size = 1.5) +
theme_bw() +
ylab("Mean variance in position of vertices")
plot_arc_var(arc_ks, type = "t_ratio", point_size = 2, line_size = 1.5) + theme_bw()
Warning: Removed 1 row(s) containing missing values (geom_path).
Warning: Removed 1 rows containing missing values (geom_point).
The plots above show a few things. 1. The 7th and 8th archetypes do not add much variance explained ( k = 6 explains over 80% of the variance, whereas 7 and 8 explain ~ 8% combined). 2. When we look at the variance explained on top of k-1 model, the 6th archetype clearly explains a large proportion of the variance (more than 5, actually) 3. The 7th and 8th archetypes have higher variance in position of the vertex, suggesting these are less robust than the first 6 which ave close to 0. 4. The t-ratio shows an increase for 6 archetypes from 5, suggesting it fits the geometry of the data better than 5 archetypes. We will follow this up with t-ratio tests to confirm.
i = 6
arc <- fit_pch(x_pca, noc = i, delta = 0, conv_crit = 1e-04, maxiter = 500)
start = Sys.time()
pch_rand = randomise_fit_pch(x_pca, arc_data = arc,
n_rand = 1000,
replace = FALSE, bootstrap_N = NA,
volume_ratio = "t_ratio",
maxiter = 500, delta = 0, conv_crit = 1e-4,
type = "m", clust_options = list(cores = 3))
# use type m to run on a single machine or cloud
# type = "m", clust_options = list(cores = 3))
# use clustermq (type cmq) to run as jobs on a computing cluster (higher parallelisation)
# type = "cmq", clust_options = list(njobs = 10))
pdf(sprintf('../../figures/ParetoTI/%s_t-ratio_test.pdf', i))
plot.r_pch_fit(pch_rand, type = c("t_ratio"), nudge_y = 5)
dev.off()
# This analysis took:
print(Sys.time() - start)
# k var_name var_obs p_value
# 1: 3 varexpl 0.4309729 0.001
# 2: 3 t_ratio 0.5023466 0.001
# 3: 3 total_var NA NaN
# ---
# k var_name var_obs p_value
# 1: 4 varexpl 0.5727041 0.001
# 2: 4 t_ratio 0.2134559 0.003
# 3: 4 total_var NA NaN
# ---
# k var_name var_obs p_value
# 1: 5 varexpl 0.6979477 0.001
# 2: 5 t_ratio 0.1492318 0.001
# 3: 5 total_var NA NaN
# ---
# k var_name var_obs p_value
# 1: 6 varexpl 0.8313096 0.001
# 2: 6 t_ratio 0.2436056 0.001
# 3: 6 total_var NA NaN
We’ll start by fitting 6 archetypes and finding the enriched genes and gene sets for each.
Idents(object = data) <- data@meta.data$cline #add cell line labels
cols <- c(brewer.pal(9, "Set1"),'gray')
plot_arc(arc_data = arc, data = x_pca,
which_dimensions = 1:2,colors = cols,
data_lab = as.character(Idents(data))) + theme_bw()
p_pca = plot_arc(arc_data = arc, data = x_pca,
which_dimensions = 1:3, line_size = 1.5,
colors = cols,
data_lab = as.character(Idents(data)),
text_size = 60, data_size = 2)
plotly::layout(p_pca, title = "Archetypes for Top 10 PCs")
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
htmlwidgets::saveWidget(p_pca, "../../figures/ParetoTI/robust_archetypes.html")
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
p_pca = plot_arc(arc_data = arc_ave, data = x_pca,
which_dimensions = 1:3, line_size = 1.5,
data_lab = as.numeric(data[['RNA']]@scale.data['NEUROD2',]),
text_size = 60, data_size = 3)
plotly::layout(p_pca, title = "Expression of NEUROD2 in PCA")
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
htmlwidgets::saveWidget(p_pca, "../../figures/ParetoTI/NEUROD2.HTML")
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
p_pca = plot_arc(arc_data = arc_ave, data = x_pca,
which_dimensions = 1:3, line_size = 1.5,
data_lab = as.numeric(data[['RNA']]@data['YAP1',]),
text_size = 60, data_size = 3)
plotly::layout(p_pca, title = "Expression of YAP1 in PCA")
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
htmlwidgets::saveWidget(p_pca, "../../figures/ParetoTI/YAP1.html")
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
p_pca = plot_arc(arc_data = arc_ave, data = x_pca,
which_dimensions = 1:3, line_size = 1.5,
data_lab = as.numeric(data[['RNA']]@data['POU2F3',]),
text_size = 60, data_size = 3)
plotly::layout(p_pca, title = "Expression of POU2F3 in PCA")
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
htmlwidgets::saveWidget(p_pca, "../../figures/ParetoTI/POU2F3.html")
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
p_pca = plot_arc(arc_data = arc_ave, data = x_pca,
which_dimensions = 1:3, line_size = 1.5,
data_lab = as.numeric(data[['RNA']]@data['ASCL1',]),
text_size = 60, data_size = 3)
plotly::layout(p_pca, title = "Expression of ASCL1 in PCA")
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
htmlwidgets::saveWidget(p_pca, "../../figures/ParetoTI/ASCL1.html")
No trace type specified:
Based on info supplied, a 'scatter3d' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
Archetypes: 1. N (H524) 2. A/N (CORL279) 3. A2 (DMS53) 4. Y (H841) 5. A (H69) 6. A2 (DMS454) # Determine enriched genes and gene sets to define archetypes
This will be especially interesting for comparing archetypes 4 and 5, since it is pretty clear that the shape of the data is not clearly defined without an archetype at 5, but not many cells actually lie close to it. We use the ParetoTI package to evaulate gene sets enriched in each location.
labs = get_top_decreasing(summary_genes = enriched_genes, summary_sets = enriched_sets,
cutoff_genes = 0.05,cutoff_sets = 0.05,
cutoff_metric = "wilcoxon_p_val",
p.adjust.method = "fdr",
order_by = "mean_diff", order_decreasing = T)
-- archetype_1
SNTB1, SUSD2, MYC, CRABP1
HMGB1, JPH4, SOX11, GABBR2
NPW, RPL38, HMOX1, RPL21
heart_growth
T_helper_cell_differentiation
interleukin_13_production
-- archetype_2
BASP1, FAM91A1, MEST, DLK1
HEPACAM2, NHLH2, ISL1, INSM1
RAI14, MEX3D, CDKN2C, PEG10
regulation_of_interleukin_13_production
regulation_of_cardiac_muscle_tissue_growth
negative_regulation_of_CD4_positive__alpha_beta_T_cell_differentiation
-- archetype_3
SST, PCSK1, UCP2, SPINK1
FAM178B, CCND2, SEC11C, SCNN1A
EGFL7, CYP1A1, TFF3, EPCAM
determination_of_bilateral_symmetry
atrial_septum_morphogenesis
signal_transduction_involved_in_G2_DNA_damage_checkpoint
-- archetype_4
CAV1, SGK1, CTGF, CLDN6
SERPINH1, LPHN2, CAV2, ENC1
PXDN, PTRF, MID1, FNIP2
metanephric_epithelium_development
exosomal_secretion
atrial_septum_morphogenesis
-- archetype_5
NR2F1, COL2A1, ZIC2, NELL1
FABP7, CALB1, CDKN1A, MAP1B
DLX6-AS1, RALYL, SPOCK1, PCDH9
regulation_of_glomerular_mesangial_cell_proliferation
glomerulus_vasculature_development
atrial_septum_development
-- archetype_6
PCSK2, DSP, MPP1-1, ASCL1
TDRD1, MIAT, UGDH, POU4F2
G6PD-1, CLDN18, OR51E2, GUCY2C
extracellular_exosome_biogenesis
cerebral_cortex_GABAergic_interneuron_differentiation
positive_regulation_of_CD4_positive__alpha_beta_T_cell_activation
The format we need the enrichment file in is a csv with the columns: archetype #,Feature Name,P value (Mann-Whitney),Median Difference,Mean Difference,Significant after Benjamini-Hochberg correction?,Is first bin maximal?
enriched_sets$`Feature Name` <- mutate_all(enriched_sets$`Feature Name`, funs=toupper)
Error in UseMethod("tbl_vars") :
no applicable method for 'tbl_vars' applied to an object of class "character"
We find the vector associated withthis shift by taking the difference of the averages of each cell line in PCA space (or archetype space). This should result in a vector pointing from one average to the other. We can then reconstruct these vectors in gene space to determine which genes are playing a major role in the difference between the two populations.
session_info. = devtools::session_info()
session_info.